---
title: "Figure A.4"
output: 
---

# Figure A.4

# Who's to Blame? Postconflict Violence and Public Attitudes Towards Peace Agreements
# Wyer, Frank. 

#clear environment
```{r clear environment}
rm(list = ls())
```

# uncomment and set working directory to replication archive
# setwd("~/blame_replication")

# Uncomment to install packages if necessary
# install.packages("tidyverse")
# install.packages("readxl")
# install.packages("cowplot")
# install.packages("scales")
# install.packages("ggpubfigs")
# devtools::install_github("JLSteenwyk/ggpubfigs")


#load packages
```{r}
library(tidyverse)
library(readxl)
library(cowplot)
library(scales)
library(ggpubfigs)
```

#read in survey data
```{r}
survey_clean <- read.csv("survey_clean.csv")
```

#read in census population data
```{r}
census_df <- read_xlsx("Raw Data Files/census_data_DANE.xlsx", skip = 11)
```

#clean census data
```{r clean census}
census_pop_22 <- census_df %>% filter(AÑO == 2022 & `ÁREA GEOGRÁFICA` != "Total") %>% dplyr::select(!contains("Total")) %>% pivot_longer(Hombres_0:`Mujeres_100 y más`, names_to = "demo", values_to = "pop") %>% separate_wider_delim(demo, names = c("gender", "age"), delim = "_") %>% rename(year = AÑO, areatype = `ÁREA GEOGRÁFICA`) %>% mutate(age = as.numeric(str_remove(age, " y más"))) %>% filter(age > 17) #limit to adult population in 2022
```

#generate population categories to match survey categories
```{r}
census_pop_22 <- census_pop_22 %>% mutate(
age_cat = case_when( #variable for age categories
age < 26 ~ 1, 
age > 25 & age < 36 ~ 2,
age > 35 & age < 46 ~ 3,
age > 45 & age < 56 ~ 4,
age > 55 & age < 66 ~ 5,
age > 65 ~ 6
),
area_cat = case_when( #variable for urban center or rural periphery
areatype == "Cabecera" ~ 1,
areatype == "Centros Poblados y Rural Disperso" ~ 2
),
sex_cat = case_when( #variable for male or female
gender == "Hombres" ~ 1,
gender == "Mujeres" ~ 2
)
)
```

#aggregate to strata and calculate proportions in population
```{r}
totpop22 <- census_pop_22 %>% ungroup() %>% summarise(totpop = sum(pop)) %>% as_vector() #total adult population

census_pop_22 <- census_pop_22 %>% group_by(area_cat, age_cat, sex_cat) %>% summarise(stratapop = sum(pop), strataprop = stratapop/totpop22) #proportion of population within each strata
```

#relabel age categories for plot
```{r}
survey_clean <- survey_clean %>% mutate(age_cat_verbose = case_when(
Q15 == 1 ~ "18-25",
Q15 == 2 ~ "26-35",
Q15 == 3 ~ "36-45",
Q15 == 4 ~ "46-55",
Q15 == 5 ~ "56-65",
Q15 == 6 ~ "65+",
TRUE ~ "NA"
))

census_pop_22 <- census_pop_22 %>% mutate(age_cat_verbose = case_when(
age_cat == 1 ~ "18-25",
age_cat == 2 ~ "26-35",
age_cat == 3 ~ "36-45",
age_cat == 4 ~ "46-55",
age_cat == 5 ~ "56-65",
age_cat == 6 ~ "65+",
TRUE ~ "NA"
))
```

#plot age distributions in census and survey
```{r plot age}
#age distribution in survey
survey_age_plot <- ggplot(survey_clean, aes(age_cat_verbose, fill = age_cat_verbose)) +
geom_bar(aes(y=..count../sum(..count..))) +
  scale_y_continuous(limits = c(0, .25), labels=percent_format()) +
  scale_fill_manual(name = "", values = friendly_pal("ito_seven")) +
  labs(
    title = "Survey Respondents",
    x = "",
    y = "",
    fill = ""
  ) +
  theme_minimal() +
  theme(legend.position = "none",
    axis.text.x = element_text(angle = 45, hjust = 1)
  )
  
#age distribution in census
census_age_plot <- census_pop_22 %>% group_by(age_cat_verbose) %>% summarise(age_prop = sum(stratapop)/totpop22) %>% ggplot(aes(age_cat_verbose, age_prop, fill = age_cat_verbose)) +
geom_bar(stat = "identity") +
  scale_y_continuous(limits = c(0, .25), labels=percent_format()) +
  scale_fill_manual(name = "", values = friendly_pal("ito_seven")) +
  labs(
    title = "Census Projection",
    x = "",
    y = "",
    fill = ""
  ) +
  theme_minimal() +
  theme(legend.position = "none",
    axis.text.x = element_text(angle = 45, hjust = 1)
  )  

#combined into single plot  
age_plot <- plot_grid(census_age_plot, survey_age_plot)  
```
